This script analyzes grassland bird diverity in restored prairies at Crow-Hassan Park. It assesses how diversity has changed over time and how it is related to a variety of factors including climate, area, and management regimes. Findings from these analyses were presented at the Minnesota Ornithologists’ Union 2023 Paper Session.
library(dplyr)
library(ggplot2)
library(ggthemes)
library(kableExtra)
library(sf)
library(readr)
library(here)
library(vegan)
library(gridExtra)
library(insight)
library(performance)
library(tidyverse)
library(glmmTMB)
library(DHARMa)
library(snakecase)
source(here("Scripts", "1_Data_Import.R"))
source(here("Scripts", "2_Spatial_Data_Import.R"))
source(here("Scripts", "3_Data_Cleaning.R"))
source(here("Scripts", "4_Covariate_Data_Prep.R"))
source(here("Scripts", "5_Prep_Species.R"))# Check for erroneous records where BirdCount10 is greater than BirdCount5
erroneous_records <- bird.tblBirds_PointCounts %>%
filter(BirdCount10 < BirdCount05)
# Combine transect and point count data, addressing erroneous records. By taking max of BirdCount05 and BirdCount10 we at least partially address erroneous records above. This chunk also drops rows with 0 counts (which represent species only counted outside survey area), as well
dat <- bind_rows(bird.tblBirds_Transects, bird.tblBirds_PointCounts) %>%
rowwise() %>%
mutate(count = max(BirdCountIn, BirdCount05, BirdCount10, na.rm = TRUE)) %>%
ungroup() %>%
filter(count > 0) %>%
filter(!SurveyCode %in% c("CHBB14", "CHOA14", "CHOA52")) # Drop surveys of prairie remnant (not restored) and CHOA (not restored at time of surveys)
# Calculate diversity metrics
survey_diversity <- dat %>%
group_by(SurveyCode, SurveyDate) %>%
summarize(
richness = specnumber(count),
shannon = diversity(count, index = "shannon"),
simpson = diversity(count, index = "simpson"),
invsimpson = 1 / simpson,
total_individual_birds = sum(count)
)
# Plot diversity metrics (collector curves)
# survey_diversity %>%
# pivot_longer(cols = c(richness, shannon, invsimpson, simpson),
# names_to = "metric") %>%
# ggplot(aes(x = total_individual_birds, y = value)) +
# geom_point() +
# geom_smooth() +
# facet_wrap(~metric, nrow = 4, scales = "free_y")
# Calculate diversity metrics for grassland species only
survey_diversity_prairie <- dat %>%
filter(BirdCode %in% prairie_sp) %>%
group_by(SurveyCode, SurveyDate) %>%
summarize(
prairie_richness = specnumber(count),
prairie_shannon = diversity(count, index = "shannon"),
prairie_simpson = diversity(count, index = "simpson"),
prairie_invsimpson = 1 / prairie_simpson,
prairie_total_individual_birds = sum(count)
)
# Plot diversity metrics for prairie species (collector curves)
# survey_diversity_prairie %>%
# pivot_longer(cols = c(prairie_richness, prairie_shannon, prairie_invsimpson, prairie_simpson),
# names_to = "metric") %>%
# ggplot(aes(x = prairie_total_individual_birds, y = value)) +
# geom_point() +
# geom_smooth() +
# facet_wrap(~metric, nrow = 4, scales = "free_y")
# Merge overall and prairie diversity metrics, replacing NAs with 0 for prairie-specific metrics
survey_diversity <- left_join(survey_diversity, survey_diversity_prairie,
by = join_by(SurveyCode, SurveyDate)
) %>%
replace_na(list(
prairie_richness = 0,
prairie_shannon = 0,
prairie_simpson = 0,
prairie_invsimpson = Inf,
prairie_total_individual_birds = 0
))# Append survey details
survey_diversity <- survey_diversity %>%
left_join(bird.tblSurveyDefn, by = join_by(SurveyCode)) %>%
mutate(
year = sub("-.*", "", SurveyDate),
year = as.numeric(year)
)
survey_diversity <- survey_diversity %>%
mutate(site_year = paste0(SurveyCode, "-", year))
# Append total area planted on landscape & spring precipitation to surveys
survey_diversity_CHPR <- survey_diversity %>%
filter(ParkCode == "CHPR" & HabCode == "OA") %>%
left_join(area_by_year, by = join_by(year)) %>%
left_join(spring_precip, by = join_by(year))
# Graph of cumulative restored area
ggplot(data = area_by_year, aes(x = year, y = total_area_m2 / 4046.86)) +
geom_line(size = 1.3) +
theme_clean() +
labs(
title = "Restored prairie area, Crow-Hassan Park Reserve",
subtitle = "Crow-Hassan Park Reserve",
x = "Year",
y = "Cumulative restored area (acres)"
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)# Graph of spring precipitation
ggplot(data = spring_precip, aes(x = year, y = spring_precip_mm)) +
geom_line() +
geom_point() +
# geom_smooth(method = "lm", formula = y~x, se = FALSE) +
theme_clean() +
labs(
title = "Total spring precipitation, March-May",
subtitle = "Crow-Hassan Park Reserve",
x = "Year",
y = "Precipitation (mm)"
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)# Classify surveys as stationary or moving
survey_diversity_CHPR <- survey_diversity_CHPR %>%
mutate(movement = ifelse(TypeCode == "PC", "Stationary", "Traveling"))
# Merge BB and OA surveys that are actually the same transect
survey_diversity_CHPR$SurveyCode[which(survey_diversity_CHPR$SurveyCode == "CHBB02")] <- "CHOA02"
survey_diversity_CHPR$SurveyCode[which(survey_diversity_CHPR$SurveyCode == "CHBB09")] <- "CHOA09"
survey_diversity_CHPR$SurveyCode[which(survey_diversity_CHPR$SurveyCode == "CHBB14")] <- "CHOA14"
survey_diversity_CHPR$SurveyCode[which(survey_diversity_CHPR$SurveyCode == "CHBB16")] <- "CHOA14"
survey_diversity_CHPR$SurveyCode[which(survey_diversity_CHPR$SurveyCode == "CHBB01")] <- "CHOA01"
# Generate dataframe with the year each unit was planted
year_planted <- data.frame(
Survey_ID = c("CHOA01", "CHOA02", "CHOA03", "CHOA04", "CHOA05", "CHOA06", "CHOA09", "CHOA10", "CHOA14", "CHOA49", "CHOA52", "CHOA78", "CHPC06", "CHPC07", "CHPC08", "CHPC09", "CHPC10"),
year_planted = c(1978, 1972, 1973, 1977, 1975, 1978, 1976, 1976, 1915, 1979, 2014, 1974, 2015, 2015, 2015, 2015, 2015)
)
# Explanation of assumptions for certain surveys
# CHOA14 is the remnant prairie on the hill just northeast of Prairie Lake. Assuming "planted" 1915
# CHOA04 doesn't have a line in the spatial transect layer, but assuming it's the unit labeled Unit 4 in the burns layer, which it says was planted 1977.
# CHOA49 doesn't have a line in the spatial transect layer, but assuming it's the unit labeled CRO7 in the burns layer, which it says was planted 1979.
# CHOA52 doesn't have a line in the spatial transect layer, but assuming it's the unit labeled CRO8 in the burns layer, which it says was planted 2014.
# CHOA78 doesn't have a line in the spatial transect layer, but assuming it's the units labeled CRO22, CRO26A, & CRO27 in the burns layer, which it says was planted 1974.
# Calculate years since planting for each survey and disallow negative years since planted (make them 0).
survey_diversity_CHPR <- survey_diversity_CHPR %>%
left_join(year_planted, by = c("SurveyCode" = "Survey_ID")) %>%
mutate(years_since_planted = year - year_planted) %>%
mutate(years_since_planted = ifelse(years_since_planted < 0, 0, years_since_planted))
# Create a new variable 'era' based on when a unit was planted (20th vs 21st century).
survey_diversity_CHPR <- survey_diversity_CHPR %>%
mutate(era = ifelse(year_planted < 2015, "early", "late"))
# Generate dataframe representing mapping of survey IDs to burn unit IDs.
burn_unit_lookup <- data.frame(
Survey_ID = c("CHOA01", "CHOA02", "CHOA03", "CHOA04", "CHOA05", "CHOA06", "CHOA09", "CHOA10", "CHOA14", "CHOA49", "CHOA52", "CHOA78", "CHPC06", "CHPC07", "CHPC08", "CHPC09", "CHPC10"),
Burn_unit_ID_new_2015 = c("CRO 1", "CRO 2", "CRO 3", "CRO 4", "CRO 5", "CRO 6", "CRO 9", "CRO 10", "CRO 14", "CRO 49", "CRO 52", NA, "CRO 52", "CRO 40", "CRO 18", "CRO 29", "CRO 34")
)
# CHOA78 probably surveyed what is a combination of Burn_unit_ID_new_2015 c("CRO 7", "CRO 8", "CRO 57"). I'll probably just annoate this survey's time-since burn manually
# note CHPC08 straddles CRO 18 and CRO 19, but more situated in 18
# CHPC10 straddles CRO 34 and CRO 33, but more situated in 34
# Calculate the year last burned for every survey in survey_diversity_CHPR
survey_diversity_CHPR <- survey_diversity_CHPR %>%
left_join(burn_unit_lookup, by = c("SurveyCode" = "Survey_ID")) %>%
mutate(year_last_burned = NA)
for (i in 1:nrow(survey_diversity_CHPR)) {
survey_unit <- survey_diversity_CHPR$Burn_unit_ID_new_2015[i]
survey_year <- as.numeric(survey_diversity_CHPR$year[i])
burns_filtered <- burns %>%
filter(
Burn_unit_ID_new_2015 == survey_unit,
burn_status_simple == 1,
as.numeric(year) <= survey_year
)
survey_diversity_CHPR$year_last_burned[i] <- ifelse(is.infinite(max(burns_filtered$year)), NA, max(burns_filtered$year))
}
# Calculate years since burn from survey year
survey_diversity_CHPR <- survey_diversity_CHPR %>%
mutate(years_since_burn = year - year_last_burned)
# Define the desired response variable and its label based on the variable name
response_variable <- "prairie_richness"
response_variable_label <- case_when(
grepl("richness", response_variable, ignore.case = TRUE) ~ "Species richness",
grepl("simpson", response_variable, ignore.case = TRUE) ~ "Species diversity (Simpson index)",
grepl("shannon", response_variable, ignore.case = TRUE) ~ "Species diversity (Shannon index)",
TRUE ~ "Species diversity"
)
# Create a new variable 'n' with the response variable
survey_diversity_CHPR <- survey_diversity_CHPR %>%
mutate(n = get(response_variable))
# Adjust values for Simpson diversity to be > 0 and < 1 (beta distribution requirement)
if (grepl("simpson", response_variable, ignore.case = TRUE)) {
survey_diversity_CHPR$n[which(survey_diversity_CHPR$n <= 0)] <- 0.1
}
# # If modeling Shannon diversity we use a Gamma distribution, where values need to be > 0. Adjust zeroes slightly up.
# if(grepl("shannon", response_variable, ignore.case = TRUE)){
# survey_diversity_CHPR$n[which(survey_diversity_CHPR$n <= 0)] <- 0.1
# }
# For each survey get the previous year's diversity value.
survey_diversity_CHPR$previous_survey_n <- NA
for (i in 1:nrow(survey_diversity_CHPR)) {
surveys <- survey_diversity_CHPR %>%
filter(SurveyCode == survey_diversity_CHPR$SurveyCode[i]) %>%
arrange(SurveyDate)
survey_row <- which(surveys$SurveyDate == survey_diversity_CHPR$SurveyDate[i])
previous_survey_n <- surveys$n[survey_row - 1]
if (survey_row > 1) {
survey_diversity_CHPR[i, "previous_survey_n"] <- previous_survey_n
}
}Visualize data distribution
# Histogram with the distribution of surveys by years since site restored
ggplot(data = survey_diversity_CHPR, aes(x = years_since_planted)) +
geom_histogram(binwidth = 2, fill = "grey", color = "#e9ecef", alpha = 0.9) +
# geom_vline(aes(xintercept = 0), colour="maroon", size = 1.25) +
theme_clean() +
labs(
title = "Distribution of surveys by years since unit was planted",
subtitle = "Crow-Hassan Park Reserve (open areas)",
x = "Years since unit planted",
y = "Number of surveys"
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
) +
scale_x_continuous(breaks = seq(-25, 125, by = 5))
# Histogram with the distribution of surveys by years since site burned
ggplot(data = survey_diversity_CHPR, aes(x = years_since_burn)) +
geom_histogram(binwidth = 1, fill = "grey", color = "#e9ecef", alpha = 0.9) +
theme_clean() +
labs(
title = "Distribution of surveys by years since unit was last burned",
subtitle = "Crow-Hassan Park Reserve (open areas)",
x = "Years since burn",
y = "Number of surveys"
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
) +
scale_x_continuous(breaks = seq(0, 100, by = 1))Visualize relationship between diversity and covariates
# Plot species richness vs. year and save the plot
ggplot(data = survey_diversity_CHPR, aes(x = as.Date(SurveyDate, "%y-%m-%d"), y = n, color = SurveyCode)) +
geom_line(alpha = .3) +
geom_point(alpha = .3) +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
theme_clean() +
labs(
title = "Species richness over time",
subtitle = "Crow-Hassan Park Preserve",
x = "Year",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_year_", response_variable, ".jpg"), width = 10, height = 6)
# Plot species richness vs. year, faceted. Then save plot.
ggplot(data = survey_diversity_CHPR, aes(x = as.Date(SurveyDate, "%y-%m-%d"), y = n, color = SurveyCode)) +
geom_line(alpha = .3) +
geom_point(alpha = .3) +
facet_wrap(~SurveyCode) +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# theme_clean() +
labs(
title = "Species richness over time",
subtitle = "Crow-Hassan Park Preserve",
x = "Year",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_facets_year_", response_variable, ".jpg"), width = 12, height = 6)
# Plot species richness vs. years since planted and save plot.
ggplot(data = survey_diversity_CHPR, aes(x = years_since_planted, y = n, color = SurveyCode)) +
geom_line(alpha = .3) +
geom_point(alpha = .3) +
geom_vline(aes(xintercept = 0), colour = "grey", size = 1, linetype = "dashed") +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
theme_clean() +
labs(
title = "Species richness vs. years since restoration",
subtitle = "Crow-Hassan Park Preserve",
x = "Years since planted",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_years_since_planted_", response_variable, ".jpg"), width = 10, height = 6)
# Plot species richness vs. years since planted, faceted.
ggplot(data = survey_diversity_CHPR, aes(x = years_since_planted, y = n, color = SurveyCode)) +
geom_line(alpha = .3) +
geom_point(alpha = .3) +
geom_vline(aes(xintercept = 0), colour = "grey", size = 1, linetype = "dashed") +
geom_smooth() +
facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
# theme_clean() +
labs(
title = "Species richness vs. years since restoration",
subtitle = "Crow-Hassan Park Preserve",
x = "Years since planted",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_facets_years_since_planted_", response_variable, ".jpg"), width = 12, height = 6)
# Plot species richness vs. total restored area on landscape
ggplot(data = survey_diversity_CHPR, aes(x = total_area_m2 / 4046.86, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
theme_clean() +
labs(
title = "Species richness vs. total restored area",
subtitle = "Crow-Hassan Park Preserve",
x = "Total restored area (acres)",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_total_area_", response_variable, ".jpg"), width = 10, height = 6)
# Plot species richness vs. total restored area on landscape, faceted
ggplot(data = survey_diversity_CHPR, aes(x = total_area_m2 / 4046.86, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
facet_wrap(~SurveyCode) +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# theme_clean() +
labs(
title = "Species richness vs. total restored area",
subtitle = "Crow-Hassan Park Preserve",
x = "Total restored area (acres)",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_facets_total_area_", response_variable, ".jpg"), width = 12, height = 6)
# Plot species richness vs. years since burned
ggplot(data = survey_diversity_CHPR, aes(x = years_since_burn, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
geom_vline(aes(xintercept = 0), colour = "grey", size = 1, linetype = "dashed") +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
theme_clean() +
labs(
title = "Species richness vs. years since last burn",
subtitle = "Crow-Hassan Park Preserve",
x = "Years since last burn",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_years_since_burned_", response_variable, ".jpg"), width = 10, height = 6)
# Plot species richness vs. years since burned, faceted
ggplot(data = survey_diversity_CHPR, aes(x = years_since_burn, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
geom_vline(aes(xintercept = 0), colour = "grey", size = 1, linetype = "dashed") +
geom_smooth() +
facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
# theme_clean() +
labs(
title = "Species richness vs. years since last burn",
subtitle = "Crow-Hassan Park Preserve",
x = "Years since last burn",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_facets_years_since_burned_", response_variable, ".jpg"), width = 12, height = 6)
# Plot species richness vs. precip
ggplot(data = survey_diversity_CHPR, aes(x = spring_precip_mm, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
# geom_vline(aes(xintercept = 0), colour="grey", size = 1, linetype = "dashed") +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
theme_clean() +
labs(
title = "Species richness vs. spring precipitation",
subtitle = "Crow-Hassan Park Preserve",
x = "Spring precipitation (mm)",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_precip_", response_variable, ".jpg"), width = 10, height = 6)
# Plot species richness vs. years since burned, faceted
ggplot(data = survey_diversity_CHPR, aes(x = spring_precip_mm, y = n, color = SurveyCode)) +
# geom_line(alpha= .3) +
geom_point(alpha = .3) +
# geom_vline(aes(xintercept = 0), colour="grey", size = 1, linetype = "dashed") +
geom_smooth() +
facet_wrap(~SurveyCode) +
ylim(0, max(survey_diversity_CHPR$n)) +
# theme_clean() +
labs(
title = "Species richness vs. spring precipitation",
subtitle = "Crow-Hassan Park Preserve",
x = "Spring precipitation (mm)",
y = response_variable_label
) +
theme(
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 10, face = "italic"),
legend.title = element_text(size = 10, face = "plain")
)
ggsave(path = here("Results", "994_Bird_modeling"), filename = paste0("raw_dat_facets_precip_", response_variable, ".jpg"), width = 12, height = 6)# Rescale variables for modeling
survey_diversity_CHPR$total_area_ha <- survey_diversity_CHPR$total_area_m2 / 10000
survey_diversity_CHPR$total_area_ha_sc <- scale(survey_diversity_CHPR$total_area_ha)
survey_diversity_CHPR$total_area_acres <- survey_diversity_CHPR$total_area_m2 / 4046.86
survey_diversity_CHPR$total_area_acres_sc <- scale(survey_diversity_CHPR$total_area_acres)
survey_diversity_CHPR$total_area_m2_sc <- scale(survey_diversity_CHPR$total_area_m2)
survey_diversity_CHPR$year_sc <- scale(survey_diversity_CHPR$year)
survey_diversity_CHPR$spring_precip_mm_sc <- scale(survey_diversity_CHPR$spring_precip_mm)
survey_diversity_CHPR$years_since_planted_sc <- scale(survey_diversity_CHPR$years_since_planted)
survey_diversity_CHPR$years_since_burn_sc <- scale(survey_diversity_CHPR$years_since_burn)
survey_diversity_CHPR$TranLength_sc <- scale(survey_diversity_CHPR$TranLength)
survey_diversity_CHPR$previous_survey_n_sc <- scale(survey_diversity_CHPR$previous_survey_n)
survey_diversity_CHPR$year_planted_sc <- scale(survey_diversity_CHPR$year_planted)# Fit model with diversity of response variable, a fixed effect for yer, and random effects for site and site-year (early on a few surveys were repeated two or three in one year). The response distribution depends on the diveristy metric. Species richness uses a Poisson distribution, since it is modeling species counts. Simpson is essentially a probability (constrained form 0 to 1), so it uses a Beta distribution. Shannon uses a Gaussian distribution (I originally used a Gamma distribution, since negative values aren't possible, but model diagnostics on the normal distribution were fine).
glmmTMB.mod0 <- glmmTMB(n ~ year_sc + (1 | SurveyCode) + (1 | site_year),
data = survey_diversity_CHPR,
family = case_when(
grepl("richness", response_variable, ignore.case = TRUE) ~ "poisson",
grepl("simpson", response_variable, ignore.case = TRUE) ~ "beta",
grepl("shannon", response_variable, ignore.case = TRUE) ~ "gaussian",
TRUE ~ "error"
)
)
# Print summary of the fitted model
summary(glmmTMB.mod0)## Family: poisson ( log )
## Formula: n ~ year_sc + (1 | SurveyCode) + (1 | site_year)
## Data: survey_diversity_CHPR
##
## AIC BIC logLik deviance df.resid
## 1159.4 1173.4 -575.7 1151.4 241
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## SurveyCode (Intercept) 8.258e-02 2.874e-01
## site_year (Intercept) 7.420e-10 2.724e-05
## Number of obs: 245, groups: SurveyCode, 15; site_year, 213
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.81103 0.08530 21.231 < 2e-16 ***
## year_sc 0.13796 0.02859 4.825 1.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate the change in diversity over time as a percentage
predict_temporal_change_in_diversity <- predict(glmmTMB.mod0,
newdata = data.frame(year_sc = c(min(survey_diversity_CHPR$year_sc), max(survey_diversity_CHPR$year_sc))),
re.form = NA, se.fit = TRUE, type = "response"
)
percentage_change <- (predict_temporal_change_in_diversity$fit[2] - predict_temporal_change_in_diversity$fit[1]) / predict_temporal_change_in_diversity$fit[1] * 100
print(percentage_change)## mu_predict
## 60.38663
Species richness in a typical unit has increased by ~60% since 1978.
# This model includes all available covariates that we thought might influence diversity. Years since burn is fit as a quadratic (we expect diversity to increase and then decrease with time since burn). Model includes a first-order autoregressive term to account for temporal autocorrelation (value of previous survye included as a covariate).
glmmTMb.maximal_model <- glmmTMB(n ~ total_area_acres_sc + year_sc + years_since_planted_sc + spring_precip_mm_sc + years_since_burn_sc + I(years_since_burn_sc^2) + previous_survey_n_sc + (1 | SurveyCode) + (1 | site_year), data = survey_diversity_CHPR, family = case_when(
grepl("richness", response_variable, ignore.case = TRUE) ~ "poisson",
grepl("simpson", response_variable, ignore.case = TRUE) ~ "beta",
grepl("shannon", response_variable, ignore.case = TRUE) ~ "gaussian",
TRUE ~ "error"
))
summary(glmmTMb.maximal_model)## Family: poisson ( log )
## Formula: n ~ total_area_acres_sc + year_sc + years_since_planted_sc +
## spring_precip_mm_sc + years_since_burn_sc + I(years_since_burn_sc^2) +
## previous_survey_n_sc + (1 | SurveyCode) + (1 | site_year)
## Data: survey_diversity_CHPR
##
## AIC BIC logLik deviance df.resid
## 1048.7 1082.8 -514.3 1028.7 214
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## SurveyCode (Intercept) 1.729e-02 1.315e-01
## site_year (Intercept) 4.315e-10 2.077e-05
## Number of obs: 224, groups: SurveyCode, 13; site_year, 197
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.983896 0.054612 36.33 < 2e-16 ***
## total_area_acres_sc 0.099377 0.044699 2.22 0.026200 *
## year_sc -0.127488 0.060384 -2.11 0.034748 *
## years_since_planted_sc 0.169598 0.045776 3.70 0.000211 ***
## spring_precip_mm_sc -0.005499 0.024265 -0.23 0.820710
## years_since_burn_sc 0.085167 0.033717 2.53 0.011540 *
## I(years_since_burn_sc^2) -0.035239 0.017077 -2.06 0.039057 *
## previous_survey_n_sc 0.060969 0.032825 1.86 0.063255 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Version with an interaction between years since planted and era (do new restorations gain species faster than old ones?).
glmmTMb.maximal_model_int <- glmmTMB(n ~ year_sc + total_area_acres_sc + years_since_planted_sc * era + spring_precip_mm_sc + years_since_burn_sc + I(years_since_burn_sc^2) + previous_survey_n_sc + (1 | SurveyCode) + (1 | site_year), data = survey_diversity_CHPR, family = case_when(
grepl("richness", response_variable, ignore.case = TRUE) ~ "poisson",
grepl("simpson", response_variable, ignore.case = TRUE) ~ "beta",
grepl("shannon", response_variable, ignore.case = TRUE) ~ "gaussian",
TRUE ~ "error"
))
summary(glmmTMb.maximal_model_int)## Family: poisson ( log )
## Formula: n ~ year_sc + total_area_acres_sc + years_since_planted_sc *
## era + spring_precip_mm_sc + years_since_burn_sc + I(years_since_burn_sc^2) +
## previous_survey_n_sc + (1 | SurveyCode) + (1 | site_year)
## Data: survey_diversity_CHPR
##
## AIC BIC logLik deviance df.resid
## 1045.5 1086.5 -510.8 1021.5 212
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## SurveyCode (Intercept) 8.042e-03 8.968e-02
## site_year (Intercept) 3.212e-10 1.792e-05
## Number of obs: 224, groups: SurveyCode, 13; site_year, 197
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.380271 0.156747 15.185 <2e-16 ***
## year_sc 0.583262 0.280469 2.080 0.0376 *
## total_area_acres_sc 0.111403 0.044592 2.498 0.0125 *
## years_since_planted_sc -0.574006 0.286230 -2.005 0.0449 *
## eralate -1.448405 0.969730 -1.494 0.1353
## spring_precip_mm_sc -0.006595 0.024390 -0.270 0.7868
## years_since_burn_sc 0.087092 0.033765 2.579 0.0099 **
## I(years_since_burn_sc^2) -0.038037 0.017225 -2.208 0.0272 *
## previous_survey_n_sc 0.051715 0.033551 1.541 0.1232
## years_since_planted_sc:eralate 0.630819 0.431089 1.463 0.1434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the main model (without interaction term), time since restoration has a large positive effect on richness. Total area also has a positive association. Fire is also important, with a significant quadratic effect (richness at first increases but then decreases with time since burn). After controlling for other variables, time alone has a fairly strong negative effect on observed richness. How to interpret this? In absence of restoration we’d expect richness to be declining? Spring precipitation had no measurable association with richness.
Model with interaction changes the sign of a few predictors. Year because positive. I suppose negative effect of year in the main model could be driven by new restorations that came online in later years with low initial diversity. Once the effect of this is captured with the era term effect of time is positive. Conditional effect of time to since restoration becomes negative… Interaction between time since restoration and era not significant.
# Specify the model for diagnostics
model_for_diagnostics_and_me <- "glmmTMb.maximal_model"
# Get the names of fixed predictors and preprocess them
predictors <- find_parameters(get(model_for_diagnostics_and_me), "fixed")$conditional
predictors <- predictors[!predictors %in% grep("[(]", predictors, value = TRUE)]
predictors <- unlist(strsplit(predictors, ":"))
# Create a version of the dataset without rows containing missing covariate values
modeled_dat <- survey_diversity_CHPR %>%
select(SurveyCode, SurveyDate, all_of(predictors)) %>%
na.omit()
# Set the output path for diagnostic plot
output_path <- here("Results", "994_Bird_modeling", paste0("diagnostic_plot_", model_for_diagnostics_and_me, "_", response_variable, ".png"))
# Save the diagnostic plot as a PNG file
png(output_path)
simulationOutput <- simulateResiduals(fittedModel = get(model_for_diagnostics_and_me), n = 5000, plot = TRUE)
dev.off()## png
## 2
# These generate same plots as above, but actually print them for the markdown output.
plotQQunif(simulationOutput)# Loop through each predictor to create individual residual plots
for (i in 1:length(predictors)) {
# Set the output path for each residual plot
output_path <- here("Results", "994_Bird_modeling", paste0("residual_plot_", model_for_diagnostics_and_me, "_", response_variable, "_", predictors[i], ".png"))
# Save each residual plot as a PNG file
png(output_path)
plotResiduals(simulationOutput, modeled_dat %>% pull(predictors[i]))
dev.off()
}
for (i in 1:length(predictors)) {
plotResiduals(simulationOutput, modeled_dat %>% pull(predictors[i]))
}In general the model checks all look good. The residuals for the autoregressive term have an undesired pattern (predictions are too low at high previous diversity values), though it doesn’t look terrible to me.
me_plot <- function(mod) {
# Get fixed predictors from the model
predictors <- find_parameters(mod, "fixed")$conditional # get names of fixed terms
predictors <- predictors[!predictors %in% grep("[(]", predictors, value = T)] # drop the intercept and any quadratic terms by searching for parantheses
predictors <- unlist(strsplit(predictors, ":"))
# Initialize a list to store individual marginal effect plots
me_plots <- vector("list", length = length(predictors))
for (i in 1:length(predictors)) {
# Extract the current predictor
pred <- gsub("_sc", "", predictors[i])
pred.name.sc <- paste0(pred, "_sc")
# Set the number of steps for the sequence
pred.steps <- 500
# Generate a sequence of predictor values across the observed range
pred.vals <- seq(
min(survey_diversity_CHPR[[pred]], na.rm = TRUE),
max(survey_diversity_CHPR[[pred]], na.rm = TRUE),
length.out = pred.steps
)
# Scale predicted values by mean and standard deviation used to fit the model
pred.vals.scale <- (pred.vals - mean(survey_diversity_CHPR[[pred]], na.rm = TRUE)) /
sd(survey_diversity_CHPR[[pred]], na.rm = TRUE)
# Create a data frame for predicting response across predictor values
pred.df <- data.frame(matrix(0, ncol = length(predictors), nrow = pred.steps))
colnames(pred.df) <- predictors
pred.df[pred.name.sc] <- pred.vals.scale
pred.df[pred] <- pred.vals
# Predict response across predictor values at mean values of all other variables
out.pred <- predict(mod, pred.df, re.form = NA, se.fit = TRUE, type = "response")
pred.df <- bind_cols(pred.df, out.pred)
# Plot marginal effect
me_plot_out <- ggplot(pred.df, aes_string(x = pred, y = "fit")) +
geom_ribbon(aes(ymin = fit - 1.96 * se.fit, ymax = fit + 1.96 * se.fit), alpha = 0.2) +
geom_line(size = 1.1) +
ggthemes::theme_clean() +
labs(x = to_any_case(pred, "sentence"), y = response_variable_label)
me_plots[[i]] <- me_plot_out
}
# Combine individual plots in a grid
library(gridExtra)
n <- length(me_plots)
nCol <- floor(sqrt(n))
do.call("grid.arrange", c(me_plots, ncol = nCol))
}
# Generate marginal effect plot for the full model
full_model_plot <- me_plot(get(model_for_diagnostics_and_me))# Save the plot as a JPG file
ggsave(plot = full_model_plot, path = here("Results", "994_Bird_modeling"), filename = paste0("marginal_effects_", model_for_diagnostics_and_me, "_", response_variable, ".jpg"), width = 10, height = 10)
# Generate marginal effect plot for the year-only model
year_only_model_plot <- me_plot(glmmTMB.mod0)# Extract data for CH open area surveys
dat_chp <- bind_rows(bird.tblBirds_Transects, bird.tblBirds_PointCounts) %>%
rowwise() %>%
mutate(count = max(BirdCountIn, BirdCount05, BirdCount10, na.rm = TRUE)) %>%
ungroup() %>%
filter(count > 0) %>%
left_join(bird.tblSurveyDefn, by = join_by(SurveyCode)) %>%
mutate(
year = as.numeric(sub("-.*", "", SurveyDate)) # Convert survey date to numeric year
) %>%
filter(ParkCode == "CHPR" & HabCode == "OA")
# Table of unique observers
chp_surveys <- unique(dat_chp$SurveyCode)
chp_surveyors <- filter(bird.tblSurveyConditions, SurveyCode %in% chp_surveys) %>%
mutate(year = format(SurveyDate, format = "%Y")) %>%
group_by(Surveyor) %>%
summarize(
Total_surveys = n(),
Total_years = length(unique(year)),
Min_year = min(year),
Max_year = max(year)
) %>%
arrange(desc(Total_surveys)) %>%
mutate(
cumulative_surveys = cumsum(Total_surveys),
pct_surveys = cumulative_surveys / max(cumulative_surveys)
)
head(chp_surveyors)## [1] 13
# Surveyors for main CH transects
chp_trans_surveyors <- filter(bird.tblSurveyConditions, SurveyCode %in% c("CHOA01", "CHOA02", "CHOA09", "CHBB02", "CHBB09", "CHBB01")) %>%
mutate(year = format(SurveyDate, format = "%Y")) %>%
group_by(Surveyor) %>%
summarize(
Total_surveys = n(),
Total_years = length(unique(year)),
Min_year = min(year),
Max_year = max(year)
) %>%
arrange(desc(Total_surveys)) %>%
mutate(
cumulative_surveys = cumsum(Total_surveys),
pct_surveys = cumulative_surveys / max(cumulative_surveys)
)
head(chp_trans_surveyors)## [1] 8
# Larry's survey years
larry_years <- filter(bird.tblSurveyConditions, SurveyCode %in% c("CHOA01", "CHOA02", "CHOA09", "CHBB02", "CHBB09", "CHBB01")) %>%
filter(Surveyor == "LNG") %>%
mutate(year = format(SurveyDate, format = "%Y")) %>%
pull(year) %>%
unique() %>%
sort()
larry_years## [1] "1979" "1981" "1982" "1983" "1984" "1985" "1986" "1987" "1988" "1989" "1990" "1991" "1992"
## [14] "1993" "1994" "1995" "1996" "1997" "1998" "1999" "2000" "2001" "2002" "2003" "2004" "2005"
## [27] "2006" "2007" "2008" "2009" "2010" "2011" "2012"
# Steven's survey years
steven_years <- filter(bird.tblSurveyConditions, SurveyCode %in% c("CHOA01", "CHOA02", "CHOA09", "CHBB02", "CHBB09", "CHBB01")) %>%
filter(Surveyor == "STH") %>%
mutate(year = format(SurveyDate, format = "%Y")) %>%
pull(year) %>%
unique() %>%
sort()
steven_years## [1] "2010" "2013" "2014" "2015" "2017" "2018" "2019" "2021" "2022" "2023"
# Total surveys
total_surveys <- dat_chp %>%
group_by(SurveyCode, SurveyDate) %>%
count() %>%
nrow()
total_surveys## [1] 301
## [1] 45
# Total species
full_sp <- bird.tblBirdSpecies.ebird %>%
filter(category == "species") %>%
pull(BirdCode) %>%
unique()
chp_sp_counts <- dat_chp %>%
filter(BirdCode %in% full_sp) %>%
group_by(BirdCode) %>%
summarize(total_count = sum(count)) %>%
arrange(desc(total_count))
total_species <- nrow(chp_sp_counts)
total_species## [1] 100
# Total prairie species
chp_sp_counts_prairie_sp <- dat_chp %>%
filter(BirdCode %in% full_sp) %>%
filter(BirdCode %in% prairie_sp) %>%
group_by(BirdCode) %>%
summarize(total_count = sum(count)) %>%
arrange(desc(total_count))
total_prairie_species <- nrow(chp_sp_counts_prairie_sp)
total_prairie_species## [1] 43
## [1] 6825
# Compare species richness over time in early and later restorations. New restorations possibly accumulaitng species faster?
ggplot(survey_diversity_CHPR, aes(x = years_since_planted, y = n, color = era, group = SurveyCode)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x) +
theme_clean()## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] snakecase_0.11.1 DHARMa_0.4.6 glmmTMB_1.1.8 performance_0.10.8
## [5] insight_0.19.7 gridExtra_2.3 jagsUI_1.5.2 png_0.1-8
## [9] transformr_0.1.3 gifski_1.12.0-2 gganimate_1.0.8 ggspatial_1.1.9
## [13] ggrepel_0.9.4 RVAideMemoire_0.9-83-7 pairwiseAdonis_0.4.1 cluster_2.1.4
## [17] goeveg_0.6.5 vegan_2.6-4 lattice_0.20-45 permute_0.9-7
## [21] mapview_2.11.2 ggtext_0.1.2 ratelimitr_0.4.1 rvest_1.0.3
## [25] trelliscopejs_0.2.6 plotly_4.10.3 auk_0.7.0 readxl_1.4.3
## [29] kableExtra_1.3.4 ggthemes_5.0.0 forcats_1.0.0 stringr_1.5.1
## [33] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [37] ggplot2_3.4.4 tidyverse_2.0.0 lubridate_1.9.3 dplyr_1.1.4
## [41] DT_0.31 sf_1.0-14 rgdal_1.6-4 sp_2.1-2
## [45] RODBC_1.3-23 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.4 lme4_1.1-35.1 tidyselect_1.2.0
## [4] htmlwidgets_1.6.4 grid_4.2.2 lpSolve_5.6.19
## [7] munsell_0.5.0 codetools_0.2-18 ragg_1.2.6
## [10] units_0.8-5 withr_2.5.2 colorspace_2.1-0
## [13] qgam_1.3.4 highr_0.10 knitr_1.45
## [16] uuid_1.1-1 rstudioapi_0.15.0 stats4_4.2.2
## [19] wk_0.9.1 Rdpack_2.6 emmeans_1.8.9
## [22] labeling_0.4.3 DistributionUtils_0.6-1 bit64_4.0.5
## [25] farver_2.1.1 gap.datasets_0.0.6 rprojroot_2.0.4
## [28] TH.data_1.1-2 coda_0.19-4 vctrs_0.6.5
## [31] generics_0.1.3 xfun_0.41 timechange_0.2.0
## [34] doParallel_1.0.17 R6_2.5.1 markdown_1.12
## [37] fields_15.2 cachem_1.0.8 assertthat_0.2.1
## [40] promises_1.2.1 scales_1.3.0 vroom_1.6.4
## [43] multcomp_1.4-25 nnet_7.3-18 gtable_0.3.4
## [46] leafpop_0.1.0 spam_2.10-0 sandwich_3.0-2
## [49] rlang_1.1.2 systemfonts_1.0.5 splines_4.2.2
## [52] rebird_1.3.0 TMB_1.9.9 lazyeval_0.2.2
## [55] rjags_4-15 brew_1.0-8 checkmate_2.3.0
## [58] s2_1.1.4 yaml_2.3.7 unmarked_1.3.2
## [61] crosstalk_1.2.1 backports_1.4.1 httpuv_1.6.12
## [64] Hmisc_5.1-1 gridtext_0.1.5 tools_4.2.2
## [67] MCMCvis_0.16.3 ellipsis_0.3.2 raster_3.6-26
## [70] jquerylib_0.1.4 proxy_0.4-27 Rcpp_1.0.11
## [73] plyr_1.8.9 base64enc_0.1-3 progress_1.2.3
## [76] classInt_0.4-10 prettyunits_1.2.0 rpart_4.1.19
## [79] pbapply_1.7-2 cowplot_1.1.1 zoo_1.8-12
## [82] ggh4x_0.2.6 leafem_0.2.3 magrittr_2.0.3
## [85] data.table_1.14.8 mvtnorm_1.2-4 hms_1.1.3
## [88] mime_0.12 evaluate_0.23 xtable_1.8-4
## [91] leaflet_2.2.1 mclust_6.0.1 compiler_4.2.2
## [94] maps_3.4.1.1 KernSmooth_2.23-20 crayon_1.5.2
## [97] minqa_1.2.6 htmltools_0.5.7 mgcv_1.8-41
## [100] later_1.3.1 tzdb_0.4.0 Formula_1.2-5
## [103] DBI_1.1.3 tweenr_2.0.2 autocogs_0.1.4
## [106] MASS_7.3-58.1 boot_1.3-28 Matrix_1.6-4
## [109] cli_3.6.1 rbibutils_2.2.16 epuRate_0.1
## [112] dotCall64_1.1-1 pkgconfig_2.0.3 numDeriv_2016.8-1.1
## [115] foreign_0.8-83 terra_1.7-55 foreach_1.5.2
## [118] xml2_1.3.5 svglite_2.1.2 bslib_0.6.1
## [121] webshot_0.5.5 estimability_1.4.1 digest_0.6.33
## [124] rmarkdown_2.25 cellranger_1.1.0 htmlTable_2.4.2
## [127] gap_1.5-3 shiny_1.8.0 commonmark_1.9.0
## [130] satellite_1.0.4 nloptr_2.0.3 lifecycle_1.0.4
## [133] nlme_3.1-160 jsonlite_1.8.7 viridisLite_0.4.2
## [136] fansi_1.0.5 pillar_1.9.0 survival_3.4-0
## [139] fastmap_1.1.1 httr_1.4.7 glue_1.6.2
## [142] iterators_1.0.14 leaflet.providers_2.0.0 bit_4.0.5
## [145] class_7.3-20 stringi_1.8.2 sass_0.4.7
## [148] textshaping_0.3.7 e1071_1.7-13
By Sam Safran